Note: 1. It is expected that low number of cells for DT condition. We have seen less cells after sorting.

1 general steps

knitr::opts_chunk$set(warning=FALSE, messgae=FALSE, fig.path='Figs/', results = "hide")
## fig.width=4, fig.height=4

1.1 load library

2 working directory

system_dir = "/home/jyu/rstudio/"
working_dir = paste0(system_dir,"/")
# published_data_dir = paste0(system_dir,"/general_scripts/publised_datasets/")
# global_ref_dir =paste0(system_dir,"/general_scripts/Global_ref_data/")
# gsea_pathway_dir = paste0(system_dir,"/general_scripts/Global_ref_data/")

# source(paste0(global_ref_dir,"general_functions.R"))

3 import for seurat

3.1 DT

library(Seurat)
tmp_count = Read10X(data.dir = paste0(working_dir,"/Dec2021_10x/DT/"))

DT_seurat = CreateSeuratObject(counts = tmp_count,
                                project = "DT")

print(paste0("number of cells: ", ncol(DT_seurat)))

[1] “number of cells: 1968”

rm(tmp_count)

3.2 IL

tmp_count = Read10X(data.dir = paste0(working_dir,"/Dec2021_10x/IL/"))

IL_seurat = CreateSeuratObject(counts = tmp_count,
                                project = "IL")
print(paste0("number of cells: ", ncol(IL_seurat)))

[1] “number of cells: 7438”

rm(tmp_count)

3.3 WT

tmp_count = Read10X(data.dir = paste0(working_dir,"/Dec2021_10x/WT/"))

WT_seurat = CreateSeuratObject(counts = tmp_count,
                                project = "WT")
print(paste0("number of cells: ", ncol(WT_seurat)))

[1] “number of cells: 5619”

rm(tmp_count)

4 basic qc and filtering

4.1 DT

mt% < 10%, 1000< nFeature_RNA < 6000

DT_seurat[["percent.mt"]] <- PercentageFeatureSet(DT_seurat, pattern = "^mt-")
VlnPlot(DT_seurat,features = c("nCount_RNA","nFeature_RNA","percent.mt"))

FeatureScatter(DT_seurat,feature2 = "nFeature_RNA", feature1 = "nCount_RNA")

DT_seurat = subset(DT_seurat,subset=percent.mt < 10)
DT_seurat = subset(DT_seurat, subset=nFeature_RNA >1000 & nFeature_RNA < 6000)

VlnPlot(DT_seurat,features = c("nCount_RNA","nFeature_RNA","percent.mt"))

print(paste0("number of cells after filtering: ", ncol(DT_seurat)))

[1] “number of cells after filtering: 1318”

4.2 IL

mt% < 10%, 500< nFeature_RNA < 6000

IL_seurat[["percent.mt"]] <- PercentageFeatureSet(IL_seurat, pattern = "^mt-")
VlnPlot(IL_seurat,features = c("nCount_RNA","nFeature_RNA","percent.mt"))

FeatureScatter(IL_seurat,feature2 = "nFeature_RNA", feature1 = "nCount_RNA")

IL_seurat = subset(IL_seurat,subset=percent.mt < 10)
IL_seurat = subset(IL_seurat, subset=nFeature_RNA >500 & nFeature_RNA < 6000)

VlnPlot(IL_seurat,features = c("nCount_RNA","nFeature_RNA","percent.mt"))

print(paste0("number of cells after filtering: ", ncol(IL_seurat)))

[1] “number of cells after filtering: 4538”

4.3 WT

mt% < 10%, 500< nFeature_RNA < 6000

WT_seurat[["percent.mt"]] <- PercentageFeatureSet(WT_seurat, pattern = "^mt-")
VlnPlot(WT_seurat,features = c("nCount_RNA","nFeature_RNA","percent.mt"))

FeatureScatter(WT_seurat,feature2 = "nFeature_RNA", feature1 = "nCount_RNA")

WT_seurat = subset(WT_seurat,subset=percent.mt < 10)
WT_seurat = subset(WT_seurat, subset=nFeature_RNA >500 & nFeature_RNA < 6000)

VlnPlot(WT_seurat,features = c("nCount_RNA","nFeature_RNA","percent.mt"))

print(paste0("number of cells after filtering: ", ncol(WT_seurat)))

[1] “number of cells after filtering: 3313”

5 integrate into one seurat object

CAR_seurat = merge(x=WT_seurat,y=c(IL_seurat,DT_seurat),add.cell.ids = c("WT", "IL","DT"), project = "Kanako_CAR")

### normalization and scaling
#normalize data
CAR_seurat = CAR_seurat %>% NormalizeData(., normalization.method = "LogNormalize", scale.factor = 10000)

# find variable genes
CAR_seurat <- FindVariableFeatures(CAR_seurat, selection.method = "vst", nfeatures = 2000)

# Identify the 10 most highly variable genes
# plot variable features with labels
LabelPoints(plot = VariableFeaturePlot(CAR_seurat),
            points = head(VariableFeatures(CAR_seurat), 10),
            repel = TRUE)

# scale data
CAR_seurat <- ScaleData(CAR_seurat, features = VariableFeatures(object = CAR_seurat))
# PCA
CAR_seurat <- RunPCA(CAR_seurat, features = VariableFeatures(object = CAR_seurat))

# Examine and visualize PCA results a few different ways
# print(CAR_seurat[["pca"]], dims = 1:5, nfeatures = 5)

VizDimLoadings(CAR_seurat, dims = 1:2, reduction = "pca")

# DimPlot(CAR_seurat, reduction = "pca",dims = c(1,3))
DimPlot(CAR_seurat, reduction = "pca",dims = c(1,3),group.by = "orig.ident")

# determine dimensions
  # NOTE: This process can take a long time for big datasets, comment out for expediency. More approximate techniques such as those implemented in ElbowPlot() can be used to reduce computation time
#CAR_seurat <- JackStraw(CAR_seurat, num.replicate = 100)
#CAR_seurat <- ScoreJackStraw(CAR_seurat, dims = 1:20)


### cluster and UMAP

#### search for the perfect clusters

set.seed(123)
# JackStrawPlot(CAR_seurat, dims = 1:15)
ElbowPlot(CAR_seurat)

# cluster cells
CAR_seurat <- FindNeighbors(CAR_seurat, dims = 1:30)

### use the resolution = 0.25

set.seed(123)
CAR_seurat <- FindClusters(CAR_seurat, resolution = 0.3)
#UMAP
CAR_seurat <- RunUMAP(CAR_seurat,
                     dims = 1:30,
                     n.neighbors = 30,
                     min.dist = 1,
                     n.components = 3,
                     spread = 3)

DimPlot(CAR_seurat,dims = c(1,3),label = TRUE)

# saveRDS(CAR_seurat,file = paste0(working_dir,"Dec10x_CAR_seurat.rds"))

# CAR_seurat = readRDS(file = paste0(working_dir,"Dec10x_CAR_seurat.rds"))

5.1 UMAP per condition

## same amount of cells per condition
cell_selected = CAR_seurat@meta.data[unlist(tapply(1:nrow(CAR_seurat@meta.data),CAR_seurat@meta.data$orig.ident,function(x) sample(x,1000))),]

CAR_seurat$orig.ident = factor(CAR_seurat$orig.ident,levels = c("WT","IL","DT"))

DimPlot(CAR_seurat[, rownames(cell_selected)], reduction = "umap",split.by = "orig.ident", dims = c(2,3),group.by = "seurat_clusters",label = TRUE)

5.2 mt% per cluster

VlnPlot(CAR_seurat,features = c("percent.mt"))

5.2.1 number of cells per cluster

library(knitr)
kable(table(CAR_seurat@active.ident,CAR_seurat$orig.ident))
WT IL DT
0 480 1147 237
1 297 691 405
2 540 355 45
3 437 414 70
4 292 464 152
5 354 295 91
6 258 294 130
7 153 440 23
8 70 172 37
9 105 112 39
10 187 30 29
11 36 34 29
12 34 21 13
13 29 24 2
14 21 17 12
15 9 18 0
16 11 10 4

5.3 cellular composition

cell_freq = table(CAR_seurat$orig.ident,CAR_seurat$seurat_clusters) %>% unlist() %>% as.data.frame()

ns <- table(organ = CAR_seurat$orig.ident, cell_type = CAR_seurat$seurat_clusters)
## remove cluster 1, 5, 6
# ns = ns[,c(1,3,4,5,8)]
fq <- prop.table(ns, 1) * 100
df <- as.data.frame(fq)

# df$cell_type = factor(df$cell_type,levels = c(2,0,4,3,7))
library(ggplot2)
ggplot(df,aes(x=Freq,y=organ,fill=cell_type))+
  geom_bar(stat="identity",colour="black")+
  # scale_fill_manual(values=c("wheat4","slategray4", "darkgreen", "lawngreen","black"))+
  theme_classic()+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

rm(ns,fq,df)

6 DEGs

# find DEGs
# Idents(CAR_seurat) = "sub1"
# CAR_seurat.markers <- FindAllMarkers(CAR_seurat, only.pos = FALSE, min.pct = 0.25, logfc.threshold = 0.5)
# write.csv(CAR_seurat.markers,file = paste0(working_dir,"Dec10x_degs.csv"))
# CAR_seurat.markers = read.csv(file = paste0(working_dir,"Dec10x_degs.csv"))

# pdf(file = paste0(working_dir,"10xrun_allcells_heatmap1.pdf"),height = 20,width = 12)
# DoHeatmap(CAR_seurat,
          # features = CAR_seurat.markers %>% group_by(cluster) %>% dplyr::top_n(n = 10, wt = avg_log2FC) %>% .$gene,angle = 90)
# dev.off()

6.1 rename cluster

Idents(CAR_seurat) = "seurat_clusters"
CAR_seurat = RenameIdents(CAR_seurat,
                          "0" = "0:adi_progenitor",
                          "1" = "1:MSC",
                          "7" = "7:pre_adipocyte",
                          "3" = "3:OLC",
                          "4" = "4:OLC_progenitor",
                          "5" = "5:fibroblast",
                          "9" = "9:fibroblast",
                          "6" = "6:pericyte",
                          "10" = "10:chondrocyte",
                          "13" = "13:endothelial",
                          "12" = "12:prolifOCL",
                          "2" = "2:RBC",
                          "8" = "8:RBC",
                          "15" = "15:neutrophil",
                          "16" = "16:platelet",
                          "11" = "11:undefined",
                          "14" = "14:undefined")


# 

DimPlot(CAR_seurat,dims = c(1,3),label = TRUE)

DimPlot(CAR_seurat,dims = c(1,2),label = TRUE)

DimPlot(CAR_seurat,dims = c(2,3),label = TRUE)

6.2 doughnut figure

6.2.1 function

# The doughnut function permits to draw a donut plot
doughnut <-
function (x, labels = names(x), edges = 200, outer.radius = 0.8,
          inner.radius=0.6, clockwise = FALSE,
          init.angle = if (clockwise) 90 else 0, density = NULL,
          angle = 45, col = NULL, border = FALSE, lty = NULL,
          main = NULL, ...)
{
    if (!is.numeric(x) || any(is.na(x) | x < 0))
        stop("'x' values must be positive.")
    if (is.null(labels))
        labels <- as.character(seq_along(x))
    else labels <- as.graphicsAnnot(labels)
    x <- c(0, cumsum(x)/sum(x))
    dx <- diff(x)
    nx <- length(dx)
    plot.new()
    pin <- par("pin")
    xlim <- ylim <- c(-1, 1)
    if (pin[1L] > pin[2L])
        xlim <- (pin[1L]/pin[2L]) * xlim
    else ylim <- (pin[2L]/pin[1L]) * ylim
    plot.window(xlim, ylim, "", asp = 1)
    if (is.null(col))
        col <- if (is.null(density))
          palette()
        else par("fg")
    col <- rep(col, length.out = nx)
    border <- rep(border, length.out = nx)
    lty <- rep(lty, length.out = nx)
    angle <- rep(angle, length.out = nx)
    density <- rep(density, length.out = nx)
    twopi <- if (clockwise)
        -2 * pi
    else 2 * pi
    t2xy <- function(t, radius) {
        t2p <- twopi * t + init.angle * pi/180
        list(x = radius * cos(t2p),
             y = radius * sin(t2p))
    }
    for (i in 1L:nx) {
        n <- max(2, floor(edges * dx[i]))
        P <- t2xy(seq.int(x[i], x[i + 1], length.out = n),
                  outer.radius)
        polygon(c(P$x, 0), c(P$y, 0), density = density[i],
                angle = angle[i], border = border[i],
                col = col[i], lty = lty[i])
        Pout <- t2xy(mean(x[i + 0:1]), outer.radius)
        lab <- as.character(labels[i])
        if (!is.na(lab) && nzchar(lab)) {
            lines(c(1, 1.05) * Pout$x, c(1, 1.05) * Pout$y)
            text(1.1 * Pout$x, 1.1 * Pout$y, labels[i],
                 xpd = TRUE, adj = ifelse(Pout$x < 0, 1, 0),
                 ...)
        }
        ## Add white disc          
        Pin <- t2xy(seq.int(0, 1, length.out = n*nx),
                  inner.radius)
        polygon(Pin$x, Pin$y, density = density[i],
                angle = angle[i], border = border[i],
                col = "white", lty = lty[i])
    }

    title(main = main, ...)
    invisible(NULL)
}

7 MSC/OCL

Idents(CAR_seurat) = "seurat_clusters"
# msc_ocl_seurat = subset(msc_ocl_seurat,idents=c("0:MSC","1:MSC","3:OCL","4:OCL","7:MSC"))
msc_ocl_seurat = subset(CAR_seurat,idents=c(0,1,3,4,7)) 

DimPlot(msc_ocl_seurat,dims = c(1,2), label = TRUE)

DimPlot(msc_ocl_seurat,dims = c(1,3), label = TRUE)

DimPlot(msc_ocl_seurat,dims = c(2,3), label = TRUE)

7.1 re-do UMAP

msc_ocl_seurat = RunUMAP(msc_ocl_seurat, 
                     dims = 1:30,
                     n.neighbors = 30,
                     min.dist = 0.001,
                     n.components = 3,
                     spread = 1,
                     a = 1, 
                     b = 0.95)
## 13:03:39 Read 5702 rows and found 30 numeric columns
## 13:03:39 Using Annoy for neighbor search, n_neighbors = 30
## 13:03:39 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:03:40 Writing NN index file to temp file /tmp/RtmpHbdY32/file25085036ca16
## 13:03:40 Searching Annoy index using 1 thread, search_k = 3000
## 13:03:42 Annoy recall = 100%
## 13:03:43 Commencing smooth kNN distance calibration using 1 thread
## 13:03:45 Initializing from normalized Laplacian + noise
## 13:03:45 Commencing optimization for 500 epochs, with 246142 positive edges
## 13:03:54 Optimization finished
DimPlot(msc_ocl_seurat,dims = c(1,2), label = TRUE)

DimPlot(msc_ocl_seurat,dims = c(1,3), label = TRUE)

DimPlot(msc_ocl_seurat,dims = c(1,3), label = TRUE,split.by = "orig.ident")

DimPlot(msc_ocl_seurat,dims = c(2,3), label = TRUE)

FeaturePlot(msc_ocl_seurat,features = c("Adipoq","Lpl","Apoe","Cxcl12"),dims = c(1,3),ncol = 1)

7.2 import saved msc-ocl seurat

# saveRDS(msc_ocl_seurat,paste0(working_dir,"msc-ocl_seurat_umap0.95.rds"))
# msc_ocl_seurat = readRDS(paste0(working_dir,"msc-ocl_seurat_umap0.95.rds"))

7.2.1 rename clusters

Idents(msc_ocl_seurat) = "seurat_clusters"
msc_ocl_seurat = RenameIdents(msc_ocl_seurat,
                          "1" = "1:MSC",
                          "0" = "0:adi_progenitor",
                          "7" = "7:pre_adipocyte",
                          "4" = "4:OLC_progenitor",
                          "3" = "3:OLC")


DimPlot(msc_ocl_seurat,dims = c(1,3),split.by = "orig.ident")

8 session info

sessionInfo()